# -*- coding: utf-8 -*-
import requests
import urllib
import urllib.parse
from bs4 import BeautifulSoup
from datetime import datetime
import re
import json
import urllib.request
#进行get api，用以达到post发送数据前的数据库数据清空
url3='http://119.29.28.134:8000/destroy-item'
req1= urllib.request.Request(url3,)
response1=urllib.request.urlopen(req1)
html1=response1.read()
print(html1.decode('utf-8'))
        

for i in range(0,1):
    url = 'http://news.baidu.com/ns?word=%E5%A4%A7%E5%AD%A6%E7%94%9F%E6%96%B0%E9%97%BB%20%E6%96%B0%E6%B5%AA&pn='+str(i*10)+'&cl=2&ct=1&tn=news&ie=utf-8&bt=0&et=02&ct=1&tn=news&ie=utf-8&bt=0&et=0'+str(i*10)+'&cl=2&ct=1&tn=news&rn=20&ie=utf-8&bt=0&et=0'
    headers = {
        'Connection': 'Keep-Alive',
        'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
        
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
        'Refer' : url,
        'Accept-Encoding':"utf-8"
        
        }
    res={}
    res = requests.get(url) # 使用UTF-8编码
    res.encoding = 'UTF-8'

    soup = BeautifulSoup(res.text, 'html.parser')
    for j in range(0,19):
        title=soup.select('.c-title a')[j].text
        print("标题:",title)
        url2=soup.select('.c-title a')[j]['href']
        print("链接:",url2)
        res2={}
        res2 = requests.get(url2) # 使用UTF-8编码
        res2.encoding = 'UTF-8'
        soup2 = BeautifulSoup(res2.text, 'html.parser')
        try:
            source = soup2.select('#navtimeSource span a')[0].text
            #source = soup2.select('.media_name a')[0].text
            print("来源:",source)
        except IndexError:
            source="新浪新闻"
            print("来源:",source)
        try:
            if len(soup2.select('.post_text img')) > 1 :
                img = soup2.select('.post_text img')[0]['src']
                print(img)
            else:
                img=soup2.select('.img_wrapper img')[0]['src']
                #img='http://img.blog.csdn.net/20151020131005490?0.6499622281165232'
                print("预览图:",img)
        except IndexError:
            img='http://img.blog.csdn.net/20151020131005490?0.6499622281165232'
            print("无预览图:",img)
        '''url3='http://119.29.28.134:8000/destroy-item'
        req1= urllib.request.Request(url3,)
        response1=urllib.request.urlopen(req1)
        html1=response1.read()
        print(html1.decode('utf-8'))'''
        

#进行数据title，url，from，img 四个参数匹配数据库，向服务器传送数据入库
        url4= 'http://119.29.28.134:8000/itemStore'
        postdata ={ 
        "title":title,
        "url":url2,
        "from":source,
        "img":img
         }
        data=urllib.parse.urlencode(postdata)
        data=data.encode('utf-8')
        req2= urllib.request.Request(url4,data)
        response2=urllib.request.urlopen(req2)
        html2=response2.read()
        print(html2.decode('utf-8'))
        print()



